This vignette walks you through the nuclei segmentation of DAPI images from your MERFISH experiment. There are numerous tools available for this purpose, but we will be covering a Python-based model called CellPose (Stringer et al. 2021).
Type this into your Anaconda prompt/ terminal:
conda create -n scipyenv python=3.9 pandas numpy=1.26.4
conda activate scipyenv
python -m pip install cellpose==3.0.11
where python # this will give the path to the location of the python in a Windows machine
Add the python path to your establishParams() call:
library(masmr)
library(ggplot2)
library(imager)
library(patchwork)
library(RBioFormats)
options(java.parameters = "-Xmx8g")
data_dir <- "~/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/"
out_dir <- "~/OUT"
establishParams(
imageDirs = paste0(data_dir, 'S10/'),
imageFormat = ".ome.tif",
codebookFileName = paste0(data_dir, "codebook/Jin_codebook_B.tsv"),
outDir = out_dir,
dapiDir = paste0(data_dir, "DAPI_1/"),
fpkmFileName = paste0(data_dir, "FPKM_data/Jin_FPKMData_B.tsv"),
nProbesFileName = paste0(data_dir, "ProbesPerGene.csv"),
pythonLocation = "/home/ubuntu/miniconda3/envs/scipyenv/bin/python" # new
)
## Assuming .ome.tif is extension for meta files...
## Warning in establishParams(imageDirs = paste0(data_dir, "S10/"), imageFormat = ".ome.tif", : codebookColumnNames not provided : will provide placeholders...
## resumeMode = TRUE...
## 100 images across 16 cycles ( 4 ON-bit codes )...
readImageMetaData("dapi_dir") #Switch to DAPI images and prepare meta data
## Warning in readImageMetaData("dapi_dir"): Updating params$fov_names
## Warning in readImageMetaData("dapi_dir"): Unsuccessful attempt at correcting placeholder codebook column names (params$codebook_colnames)
## Warning in readImageMetaData("dapi_dir"): bit_name not matching codebookColumnNames: either edit GLOBALCOORD.csv or params$codebook_colnames (ignore this
## warning if codebook not needed, e.g. DAPI)
establishCleanSlate()
## [1] ".Random.seed" "all_markers" "bit_number" "cellExp" "cellIDs_a" "cleanSlate" "clusterInfo"
## [8] "codebook" "codebookColumnNames" "cxWindow" "dapi_files" "data_dir" "distanceMetric" "file"
## [15] "filtout" "forExclusion" "forReg" "fpkm_mat" "gcm" "gcm_mat" "gcm_t"
## [22] "genePalette" "hnnDistBins" "i" "im" "img1" "img2" "imList"
## [29] "imMetricName" "imMetrics" "imsub" "maxClusterSize" "metric" "n" "nuclei_area"
## [36] "out_dir" "p_g" "p0" "p1" "p2" "p3" "p4"
## [43] "params" "PC_dim" "percBlank" "probabilityBLANK" "q_a1" "q_a2" "s10_dirs"
## [50] "s10_folder" "series" "seur_obj" "seur_obj.markers" "spotcalldf" "thresh" "top5_markers"
## [57] "troubleshootPlots"
params$current_fov <- params$fov_names[38]
knitr::kable( head( params$global_coords), format = 'html', booktabs = TRUE )
| x_microns | y_microns | z_microns | image_file | fov | bit_name_full | bit_name_alias | bit_name |
|---|---|---|---|---|---|---|---|
| -2645.753 | 3421.847 | 5678.52 | /home/rstudio/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/DAPI_1//DAPI_1_MMStack_1-Pos_000_000.ome.tif | MMStack_1-Pos_000_000 | DAPI_1_dapi | 0_dapi | DAPI_1_dapi |
| -2645.753 | 3609.926 | 5678.54 | /home/rstudio/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/DAPI_1//DAPI_1_MMStack_1-Pos_000_001.ome.tif | MMStack_1-Pos_000_001 | DAPI_1_dapi | 0_dapi | DAPI_1_dapi |
| -2645.753 | 3798.004 | 5678.56 | /home/rstudio/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/DAPI_1//DAPI_1_MMStack_1-Pos_000_002.ome.tif | MMStack_1-Pos_000_002 | DAPI_1_dapi | 0_dapi | DAPI_1_dapi |
| -2645.753 | 3986.082 | 5678.58 | /home/rstudio/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/DAPI_1//DAPI_1_MMStack_1-Pos_000_003.ome.tif | MMStack_1-Pos_000_003 | DAPI_1_dapi | 0_dapi | DAPI_1_dapi |
| -2645.753 | 4174.161 | 5678.60 | /home/rstudio/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/DAPI_1//DAPI_1_MMStack_1-Pos_000_004.ome.tif | MMStack_1-Pos_000_004 | DAPI_1_dapi | 0_dapi | DAPI_1_dapi |
| -2645.753 | 4362.239 | 5678.38 | /home/rstudio/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/DAPI_1//DAPI_1_MMStack_1-Pos_000_005.ome.tif | MMStack_1-Pos_000_005 | DAPI_1_dapi | 0_dapi | DAPI_1_dapi |
params$current_fov <- params$fov_names[38]
im <- readImageList( params$current_fov )[[1]]
##
## Reading images...
## 1 of 1...
plot(imager::as.cimg(im))
## Warning in as.cimg.array(im): Assuming third dimension corresponds to time/depth
establishCellSegModel(cellposeModelType = "nuclei")
##
## Preparing cell segmentation model(s)...
## Cellpose packaged model...
## Cell segmentation with CELLPOSE...
cellMask <- runCellSegModel(im, masksOnly = TRUE)
## Running CELLPOSE...
df <- as.data.frame(imager::as.cimg(cellMask))
ggplot() +
geom_raster( data = as.data.frame(imager::as.cimg(im)),
aes(x=x, y=y, fill=value) ) +
scale_fill_gradient(low='black', high='white') +
geom_point( data=df[df$value>0,],
aes(x=x, y=y, colour=factor(value)),
shape = '.') +
scale_colour_viridis_d( option='turbo' ) +
theme_void( base_size = 14 ) +
scale_y_reverse() +
coord_fixed() +
theme(legend.position = 'none')
## Warning in as.cimg.array(im): Assuming third dimension corresponds to time/depth
The default Cellpose segmentation model does not work well for my cortical organoid data. The user is highly encouraged to retrain the default cellpose using its graphical user interface (GUI) (https://github.com/MouseLand/cellpose). It takes a few hours and about 5-7 images for the cellpose cyto segmentation model to start performing at the “eye-level”.
establishCellSegModel(
cellposePreTrainedModel =
'/home/rstudio/CP_model3_newB4_800epoch_lr0pt01_FINAL'
)
##
## Preparing cell segmentation model(s)...
## User-pretrained Cellpose model...
## Cell segmentation with CELLPOSE...
cellMask <- runCellSegModel(im)
## Running CELLPOSE...
df <- as.data.frame(imager::as.cimg(cellMask))
ggplot() +
geom_raster( data = as.data.frame(imager::as.cimg(im)),
aes(x=x, y=y, fill=value) ) +
scale_fill_gradient(low='black', high='white') +
geom_point( data=df[df$value>0,],
aes(x=x, y=y, colour=factor(value)),
shape = '.') +
scale_colour_viridis_d( option='turbo' ) +
theme_void( base_size = 14 ) +
scale_y_reverse() +
coord_fixed() +
theme(legend.position = 'none')
5) Run the custom Cellpose model on all DAPI files
🏁
establishCellSegModel(
cellposePreTrainedModel =
'/home/rstudio/CP_model3_newB4_800epoch_lr0pt01_FINAL'
)
params$verbose <- F
for( fovName in params$fov_names ){
params$current_fov <- fovName
## Report current FOV / iteration
message( paste0(
'\n', fovName, ': ',
which(params$fov_names==fovName), ' of ',
length(params$fov_names), '...' ) )
im <- readImageList( params$current_fov )[[1]]
cellMask <- runCellSegModel(im)
saveCellSegMasks(cellMask, im = im)
}
🎉 Congratulations! You have just finished cell segmentation! We are half-way there to generating a gene-cell matrix.
Advanced usage examples covered in the advanced usage handbook: - Reading in more complicated Z-stack and multiple channel structure in DAPI images. - Preprocessing DAPI images for better cell segmentation (e.g. brightening DAPI images). - Dilating nuclei masks with arbitrary distance to capture cytoplasmic spots. - Applying Stardist cell segmentation model.